# Loading packages and data
library(ggplot2)
library(ecodata)
library(lubridate)
library(dplyr)
library(stringr)
library(marmap) # bathymetry
library(RColorBrewer)
library(ggnewscale)
library(sf)
library(cowplot)
library(tidyverse)
library(ggpubr)
library(sf)
library(ggdist)
library(ggpubr)
library(wesanderson)
library(raster)
#library(glmTMB)
library(ggpmisc)
library(mgcViz)
library(gratia)

# CPUE data (no env covariates)
gt_data_model_cpue <- read.csv(here::here('data/catch_data/gt_data_model_cpue.csv'))
#names(gt_data_model_cpue) <- tolower(names(gt_data_model_cpue))
# Add in column with cpue 
# note: Paul indicated to use small mesh
gt_data_model_cpue <- gt_data_model_cpue %>% 
       rename_all(., .funs = tolower) %>% 
  mutate(mesh_bin = case_when(mesh_size <= 5.6 ~ 'SM',mesh_size >= 5.6 ~ 'LG',
                                                      TRUE ~ 'NA')) %>%
  mutate(cpue_hr = sum_gt_catch/effort_dur)

# Catch data: 
sfobs <-readRDS(here::here('data/catch_data/gold_tile_sf_ob_v1_temp_price.rds'))

sfob.env <- sfobs %>% 
  mutate(mesh_bin = case_when(mesh_size <= 5.6 ~ 'SM', mesh_size >= 5.6 ~ 'LG',
                              TRUE ~ 'NA'),
         cpue_hr = SUM_GT_CATCH/effort_dur) %>% 
  filter(YEAR %in% c(1998:2022) & mesh_bin == 'SM') %>%
  dplyr::select(DATE, YEAR, MONTH, YDAY,trip_id,hull_num, area, effort_dur, 
         SUM_GT_CATCH, cpue_hr, mesh_size, mesh_bin, depth, start_lat, start_lon,
         bottomT, bottomT_avg, MIN_TEMP_C, MEAN_TEMP_C, MAX_TEMP_C,
         TEMP_VARIANCE, TEMP_DEVIATION, MEAN_DPTH_M, tri, sed) %>% 
  mutate(YEAR = as.integer(YEAR)) %>% 
  rename_all(., .funs = tolower)


areas <- sort(unique(sfob.env$area))
catch.tally.ann <- sfob.env %>% # aggregate by year
  group_by(year) %>% 
  summarise(ttl_sum = sum(sum_gt_catch))

# Length data from observer program 
lengths <- read.csv(here::here('data/catch_data/gt_data_length_andy.csv')) 
names(lengths) <- tolower(names(lengths))

# Recruitment estimates from 2021 report
recruit <- read.csv(here::here('data/assessment_data/tilefish_rec_estimate_2021.csv'))

# Merge SF/Obs catch data with recruit estimates:
catch_recruit <- cbind(recruit %>% filter(year %in% c(1998:2020)),
                       catch.tally.ann %>%
                         filter(year %in% c(1998:2020)) %>%
                         dplyr::select(ttl_sum))

# loading in shape files for maps
US.areas <- st_read(here::here('shapefiles/USA.shp'), quiet = TRUE)
canada.areas <- st_read(here::here('shapefiles/Canada.shp'), quiet = TRUE)
bts_strata <- st_read(here::here('shapefiles/NES_BOTTOM_TRAWL_STRATA.shp'),
                      quiet = TRUE)
# plot(bts_strata) # to see all bottom trawl strata

gtf_strata <- bts_strata %>% 
  filter(STRATUMA %in% c('01030', '01040', '01070', '01080', '01110', '01120', 
                         '01140', '01150', '01670', '01680', '01710', '01720', 
                         '01750', '01760')) # select just the gtf strata
# plot(gtf_strata)
bathy <- marmap::getNOAA.bathy(-81,-58, 27, 46)
bathy = fortify.bathy(bathy)

Recruitment estimate

Figure 3. Age-1 recruitment estimate from the 2021 tilefish assessment across all years

ggplot(recruit, aes(x = factor(year), y = recruit_est, group = 1))+
   geom_rect(aes(xmin = '2007', xmax = '2022', ymin = -Inf, ymax = Inf), 
            fill = 'red', alpha = 0.02) +
  geom_rect(aes(xmin = '2000', xmax = '2022', ymin = -Inf, ymax = Inf), 
            fill = 'lightblue', alpha = 0.05) +
   geom_vline(xintercept = c('1993','1999', '2005', '2013'), lty = 2) +
  geom_line(color = 'black', size = 1.5) +
  annotate("text", label = "*",
    x = 26, y = 14000, size = 8, colour = "red" )+
  xlab('Year') + 
  ylab('Total sum tilefish catch') + 
  # facet_wrap(~month)+
  theme(axis.text.x = element_text(color = 'black',
                                   size = 12, angle = 45, vjust = 1, hjust=1)) +
  ecodata::theme_facet()

### Thought: Should we isolate years associated w/strong year classes (or bad)
###           for correlations and analyses? 
SST
# SST
sst<-read.csv(here::here('data/sst/sst_ts_gtf_strata.csv'))
# SST with year lag
sst.lag <- sst %>%
  mutate(mean_sst_lag1 = lag(weighted_mean_sst,12))
         
# Join with recruit estimate
df <- dplyr::full_join(recruit, sst.lag %>%
                   group_by(year) %>% 
                   filter(year %in% c(2000:2020)),
dplyr::select(year, month, recruit.est, mean_sst, weighted_mean_sst, mean_sst_lag1),
                 by = join_by(year))
sst.rec <- df[-c(1:29),] #removes year < 2000 (when SST data begins)

#by season
sst_winter <- filter(sst.rec, month %in% c(1,2,3))
sst_spring <- filter(sst.rec, month %in% c(4,5,6))
sst_summer <- filter(sst.rec, month %in% c(7,8,9))
sst_fall <- filter(sst.rec, month %in% c(10,11,12))

## Winter
lm_winter<-lm(weighted_mean_sst ~ recruit_est, data=sst_winter)
summary(lm_winter)
## 
## Call:
## lm(formula = weighted_mean_sst ~ recruit_est, data = sst_winter)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4490 -1.1651  0.0387  0.8677  3.8249 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.620e+00  4.455e-01  19.349   <2e-16 ***
## recruit_est 1.720e-07  2.991e-07   0.575    0.568    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.657 on 60 degrees of freedom
## Multiple R-squared:  0.005477,   Adjusted R-squared:  -0.0111 
## F-statistic: 0.3304 on 1 and 60 DF,  p-value: 0.5676
cor.test(sst_winter$recruit_est, sst_winter$weighted_mean_sst, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  sst_winter$recruit_est and sst_winter$weighted_mean_sst
## t = 0.57483, df = 60, p-value = 0.5676
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1790712  0.3178990
## sample estimates:
##        cor 
## 0.07400706
ggscatter(sst_winter, x = "recruit_est", y = "weighted_mean_sst", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Winter Weighted Mean SST",
          title="Winter")

sst.rec %>% filter(month %in% c(12,1,2)) %>% 
  ggplot(aes(x = "recruit_est", y = "weighted_mean_sst",fill =month))+
           geom_point()

ggscatter(sst_winter, x = "recruit_est", y = "weighted_mean_sst", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Winter Weighted Mean SST",
          title="Winter")

## Spring
lm_spring<-lm(weighted_mean_sst ~ recruit_est, data=sst_spring)
summary(lm_spring)
## 
## Call:
## lm(formula = weighted_mean_sst ~ recruit_est, data = sst_spring)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.1147 -3.2854 -0.5526  3.7471  7.0917 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.216e+01  1.012e+00  12.020   <2e-16 ***
## recruit_est 3.556e-07  6.644e-07   0.535    0.594    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.828 on 61 degrees of freedom
## Multiple R-squared:  0.004674,   Adjusted R-squared:  -0.01164 
## F-statistic: 0.2865 on 1 and 61 DF,  p-value: 0.5944
cor.test(sst_spring$recruit_est, sst_spring$weighted_mean_sst, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  sst_spring$recruit_est and sst_spring$weighted_mean_sst
## t = 0.53523, df = 61, p-value = 0.5944
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1824868  0.3108685
## sample estimates:
##        cor 
## 0.06836944
ggscatter(sst_spring, x = "recruit_est", y = "weighted_mean_sst", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Spring Weighted Mean SST",
          title="Spring")

## Summer
lm_summer<-lm(weighted_mean_sst ~ recruit_est, data=sst_summer)
summary(lm_summer)
## 
## Call:
## lm(formula = weighted_mean_sst ~ recruit_est, data = sst_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.51168 -0.79996  0.01489  0.83898  2.73702 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.280e+01  3.071e-01   74.25   <2e-16 ***
## recruit_est 5.850e-08  2.017e-07    0.29    0.773    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.162 on 61 degrees of freedom
## Multiple R-squared:  0.001377,   Adjusted R-squared:  -0.01499 
## F-statistic: 0.08411 on 1 and 61 DF,  p-value: 0.7728
cor.test(sst_summer$recruit_est, sst_summer$weighted_mean_sst, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  sst_summer$recruit_est and sst_summer$weighted_mean_sst
## t = 0.29003, df = 61, p-value = 0.7728
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2126115  0.2822781
## sample estimates:
##        cor 
## 0.03710834
ggscatter(sst_summer, x = "recruit_est", y = "weighted_mean_sst", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean SST",
          title="Summer")

## Fall
lm_fall<-lm(weighted_mean_sst ~ recruit_est, data=sst_fall)
summary(lm_fall)
## 
## Call:
## lm(formula = weighted_mean_sst ~ recruit_est, data = sst_fall)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2757 -2.4013 -0.3087  2.5850  5.1703 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.617e+01  8.010e-01  20.191   <2e-16 ***
## recruit_est -2.495e-07  5.262e-07  -0.474    0.637    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.032 on 61 degrees of freedom
## Multiple R-squared:  0.003674,   Adjusted R-squared:  -0.01266 
## F-statistic: 0.2249 on 1 and 61 DF,  p-value: 0.637
cor.test(sst_fall$recruit_est, sst_fall$weighted_mean_sst, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  sst_fall$recruit_est and sst_fall$weighted_mean_sst
## t = -0.47428, df = 61, p-value = 0.637
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3038162  0.1900047
## sample estimates:
##         cor 
## -0.06061386
ggscatter(sst_fall, x = "recruit_est", y = "weighted_mean_sst", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Fall Weighted Mean SST",
          title="Fall")

## One year lag - Summer (peak spawn)
lm_lag<-lm(mean_sst_lag1 ~ recruit_est, data=sst_summer)
summary(lm_lag)
## 
## Call:
## lm(formula = mean_sst_lag1 ~ recruit_est, data = sst_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.38712 -0.80602  0.09212  0.85112  2.75231 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.295e+01  3.301e-01   69.53   <2e-16 ***
## recruit_est -6.760e-08  2.329e-07   -0.29    0.773    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.173 on 58 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.001451,   Adjusted R-squared:  -0.01577 
## F-statistic: 0.08425 on 1 and 58 DF,  p-value: 0.7727
cor.test(sst_summer$recruit_est, sst_summer$mean_sst_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  sst_summer$recruit_est and sst_summer$mean_sst_lag1
## t = -0.29026, df = 58, p-value = 0.7727
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2892133  0.2179469
## sample estimates:
##         cor 
## -0.03808556
ggscatter(sst_summer, x = "recruit_est", y = "mean_sst_lag1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean SST",
          title="One Year Lag")

Here we have SST in recruitment year - by month (updated 12/19)

df = sst.rec %>% 
    mutate(season = case_when(month %in% c(1,2,3) ~ 'winter', 
                              month %in% c(4,5,6) ~ 'spring', 
                              month %in% c(7,8,9) ~ 'summer', 
                              month %in% c(10,11,12) ~ 'fall'))


df %>% group_by(year, season) %>% 
  summarize(sst = mean(weighted_mean_sst),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=sst, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title =' Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+

   #facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(sst = mean(weighted_mean_sst),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=sst, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = ' Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Here we have SST lagged by 1 year - by month (updated 12/19)

df %>% group_by(year, season) %>% 
  summarize(sst = mean(mean_sst_lag1),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=sst, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(sst = mean(mean_sst_lag1),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=sst, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Bottom Temperature
Bottom T analysis

The following figures compare in-situ bottom temperature from the study-fleet data set to the recruitment estimate.

  • Note here temperatures are averaged across all depths > 50 for each month.

Stephanie- Updated BT

bt <- read.csv(here::here('data/bottom_temp/bt_ts_gtf_1970_2023.csv'))

# BT with year lag
bt.lag <- bt %>%
  mutate(mean_bt_lag1 = lag(weighted_mean_bt,12))
         
# Join with recruit estimate
bt.rec <- dplyr::full_join(recruit, bt.lag, by = join_by(year)) %>%
  dplyr::select(year, month, recruit_est, weighted_mean_bt, mean_bt_lag1) %>%
  tidyr::drop_na()

#by season
bt_winter <- filter(bt.rec, month %in% c(1,2,3))
bt_spring <- filter(bt.rec, month %in% c(4,5,6))
bt_summer <- filter(bt.rec, month %in% c(7,8,9))
bt_fall <- filter(bt.rec, month %in% c(10,11,12))

## One year lag - Summer (peak spawn)
lm_lag<-lm(mean_bt_lag1 ~ recruit_est, data=bt_summer)
summary(lm_lag)
## 
## Call:
## lm(formula = mean_bt_lag1 ~ recruit_est, data = bt_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.06292 -0.51469 -0.03642  0.58348  2.34287 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.132e+01  1.607e-01  70.446   <2e-16 ***
## recruit_est 4.113e-08  9.760e-08   0.421    0.674    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8584 on 148 degrees of freedom
## Multiple R-squared:  0.001198,   Adjusted R-squared:  -0.00555 
## F-statistic: 0.1776 on 1 and 148 DF,  p-value: 0.6741
cor.test(bt_summer$recruit_est, bt_summer$mean_bt_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  bt_summer$recruit_est and bt_summer$mean_bt_lag1
## t = 0.42137, df = 148, p-value = 0.6741
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1263470  0.1938018
## sample estimates:
##        cor 
## 0.03461544
ggscatter(bt_summer, x = "recruit_est", y = "mean_bt_lag1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean BT",
          title="One Year Lag")

## Winter
lm_winter<-lm(weighted_mean_bt ~ recruit_est, data=bt_winter)
summary(lm_winter)
## 
## Call:
## lm(formula = weighted_mean_bt ~ recruit_est, data = bt_winter)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3049 -0.5043  0.0847  0.6136  2.3256 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.103e+01  1.922e-01  57.362   <2e-16 ***
## recruit_est -7.229e-08  1.167e-07  -0.619    0.537    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.027 on 148 degrees of freedom
## Multiple R-squared:  0.002586,   Adjusted R-squared:  -0.004154 
## F-statistic: 0.3837 on 1 and 148 DF,  p-value: 0.5366
cor.test(bt_winter$recruit_est, bt_winter$weighted_mean_bt, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  bt_winter$recruit_est and bt_winter$weighted_mean_bt
## t = -0.6194, df = 148, p-value = 0.5366
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2094032  0.1103123
## sample estimates:
##         cor 
## -0.05084811
ggscatter(bt_winter, x = "recruit_est", y = "weighted_mean_bt", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Winter Weighted Mean BT",
          title="Winter")

## Spring
lm_spring<-lm(weighted_mean_bt ~ recruit_est, data=bt_spring)
summary(lm_spring)
## 
## Call:
## lm(formula = weighted_mean_bt ~ recruit_est, data = bt_spring)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.10071 -0.52654  0.05163  0.58828  1.89777 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.069e+01  1.723e-01  62.028   <2e-16 ***
## recruit_est -9.889e-10  1.046e-07  -0.009    0.992    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9201 on 148 degrees of freedom
## Multiple R-squared:  6.038e-07,  Adjusted R-squared:  -0.006756 
## F-statistic: 8.937e-05 on 1 and 148 DF,  p-value: 0.9925
cor.test(bt_spring$recruit_est, bt_spring$weighted_mean_bt, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  bt_spring$recruit_est and bt_spring$weighted_mean_bt
## t = -0.0094533, df = 148, p-value = 0.9925
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1610185  0.1595043
## sample estimates:
##           cor 
## -0.0007770595
ggscatter(bt_spring, x = "recruit_est", y = "weighted_mean_bt", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Spring Weighted Mean BT",
          title="Spring")

## Summer
lm_summer<-lm(weighted_mean_bt ~ recruit_est, data=bt_summer)
summary(lm_summer)
## 
## Call:
## lm(formula = weighted_mean_bt ~ recruit_est, data = bt_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.08669 -0.49696 -0.03112  0.56100  2.31676 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.137e+01  1.606e-01  70.823   <2e-16 ***
## recruit_est 2.023e-08  9.750e-08   0.208    0.836    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8575 on 148 degrees of freedom
## Multiple R-squared:  0.0002909,  Adjusted R-squared:  -0.006464 
## F-statistic: 0.04307 on 1 and 148 DF,  p-value: 0.8359
cor.test(bt_summer$recruit_est, bt_summer$weighted_mean_bt, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  bt_summer$recruit_est and bt_summer$weighted_mean_bt
## t = 0.20752, df = 148, p-value = 0.8359
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1435984  0.1768338
## sample estimates:
##        cor 
## 0.01705567
ggscatter(bt_summer, x = "recruit_est", y = "weighted_mean_bt", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean BT",
          title="Summer")

## Fall
lm_fall<-lm(weighted_mean_bt ~ recruit_est, data=bt_fall)
summary(lm_fall)
## 
## Call:
## lm(formula = weighted_mean_bt ~ recruit_est, data = bt_fall)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.08506 -0.62604 -0.06512  0.61777  2.22327 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.219e+01  1.852e-01  65.819   <2e-16 ***
## recruit_est -5.380e-08  1.125e-07  -0.478    0.633    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9891 on 148 degrees of freedom
## Multiple R-squared:  0.001544,   Adjusted R-squared:  -0.005202 
## F-statistic: 0.2289 on 1 and 148 DF,  p-value: 0.6331
cor.test(bt_fall$recruit_est, bt_fall$weighted_mean_bt, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  bt_fall$recruit_est and bt_fall$weighted_mean_bt
## t = -0.47839, df = 148, p-value = 0.6331
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1983061  0.1217348
## sample estimates:
##        cor 
## -0.0392933
ggscatter(bt_fall, x = "recruit_est", y = "weighted_mean_bt", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Fall Weighted Mean BT",
          title="Fall")

Here is bottom temp and recruitment not lagged

df = bt.rec %>% 
    mutate(season = case_when(month %in% c(1,2,3) ~ 'winter', 
                              month %in% c(4,5,6) ~ 'spring', 
                              month %in% c(7,8,9) ~ 'summer', 
                              month %in% c(10,11,12) ~ 'fall'))


df %>% group_by(year, season) %>% 
  summarize(bt = mean(weighted_mean_bt),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=bt, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(bt = mean(weighted_mean_bt),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=bt, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Here we have BT lagged by 1 year - by month (updated 12/19)

df %>% group_by(year, season) %>% 
  summarize(bt = mean(mean_bt_lag1),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=bt, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(bt = mean(mean_bt_lag1),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=bt, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Salinity

Here we explore salinity from the GLORYS reanalysis model at three different depths

  • 78 meters (relevant to larvae, recruits)
  • 92 meters (relevant recruits, juveniles)
  • 110 meters (relevant to juveniles, adults)
Salinity analysis
sal <- read.csv(here::here('data/salinity/sal_78m_monthly_ts_gtf.csv'))

# BT with year lag
sal.lag <- sal %>%
  mutate(mean_sal_lag1 = lag(weighted_mean_sal_78m,12), 
         year = as.numeric(year))
         
# Join with recruit estimate
sal.rec <- dplyr::full_join(recruit, sal.lag, by = join_by(year)) %>%
  dplyr::select(year, month, recruit_est, weighted_mean_sal_78m, mean_sal_lag1) %>%
  tidyr::drop_na()
df = sal.rec %>% 
    mutate(season = case_when(month %in% c(1,2,3) ~ 'winter', 
                              month %in% c(4,5,6) ~ 'spring', 
                              month %in% c(7,8,9) ~ 'summer', 
                              month %in% c(10,11,12) ~ 'fall'))


df %>% group_by(year, season) %>% 
  summarize(sal = mean(weighted_mean_sal_78m),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=sal, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(sal = mean(weighted_mean_sal_78m),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=sal, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

df %>% group_by(year, season) %>% 
  summarize(sal = mean(mean_sal_lag1),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=sal, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(sal = mean(mean_sal_lag1),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=sal, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

#### Computing correlations
win.cor = df %>% filter(season == 'winter') 
print(paste0('correlation coeff: ', round(cor(win.cor[,3], win.cor[,5]), 4)))
## [1] "correlation coeff: -0.4519"
fal.cor = df %>% filter(season == 'fall') 
print(paste0('correlation coeff: ', round(cor(fal.cor[,3], fal.cor[,4]), 4)))
## [1] "correlation coeff: 0.0713"

Currents

Cross-shelf processes may influence the retention or displacement of tilefish during early life history stages. These are explored below.

Shelf water volume

Shelf water volume: A measure of the volume of water bounded inshore of the shelf-slope front. In this analysis, shelf water is defined as all water having salinity <34 psu. The position of the shelf-slope front varies inter-annually with the higher shelf water values indicating the front being pushed further towards the shelf break.

high shv: front pushed towards sbf low shv: front pushed inshore (more slope water on shelf)

Hypothesis: Higher recruitment success correlated with years of higher shelf water volume in spring/summer. These months months may be particularly important as that is when spawning is occurring and the position of the sbf may influence the position of larvae (away from spawning grounds).

Additional variables in this dataset are shelf water temperature and salinity which may also be indicative of habitat conditions.

# Shelf water volume
shlfvol <- read.csv(here::here('data/shelf_water_volume/ShelfWaterVolume_BSB_update.csv'))

# wrangling date info, converting doy to date and month 
yrs <- as.vector(nrow(shlfvol))
shlfvol$Year <- as.character(shlfvol$Year)
for (i in 1:nrow(shlfvol)){
  yrs[i] <- strsplit(shlfvol$Year, ".", fixed = TRUE)[[i]][1]
}
shlfvol$year <- yrs

shlfvol <- shlfvol %>% mutate(date_= as.Date(Year.Day-1, 
                                             origin=paste0(year, "-01-01")), 
                              month= strftime(date_, "%m"), 
                              day=strftime(date_,"%d"), 
                              year = as.integer(year),
                              month = as.numeric(month))  


# Create shw vol by month w/lag
df.lag = shlfvol %>%
  group_by(year,month) %>% 
  summarise(mean_t = mean(ShW.T),
            mean_s = mean(ShW.S),
            mean_v = mean(ShW.Vol)) %>% 
  mutate(mean_t_lag2 = lag(mean_t,2),
         mean_t_lag3 = lag(mean_t,3), 
         mean_t_lag6 = lag(mean_t,6),
         mean_s_lag2 = lag(mean_s,2),
         mean_s_lag3 = lag(mean_s,3), 
         mean_s_lag6 = lag(mean_s,6),
         mean_v_lag2 = lag(mean_v,2),
         mean_v_lag3 = lag(mean_v,3), 
         mean_v_lag6 = lag(mean_v,6))
# Join in-situ bottom temps w/assessment recruitment estimate
df.join.shlf = dplyr::full_join(recruit, df.lag, by = join_by(year)) %>%
  dplyr::select(year, month, recruit_est, mean_t, mean_s, mean_v, 
                mean_t_lag2, mean_s_lag2, mean_v_lag2,
                mean_t_lag3, mean_s_lag3, mean_v_lag3) %>%
  tidyr::drop_na()
# See what months have data
sort(unique(df.join.shlf$month))
## [1]  7  8  9 10 11
hist(df.join.shlf$month) # will group into spring/summer fall/winter categories

## Shelf water volume no lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_v)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water temperature no lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_t)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water temperature') +
  labs(title = 'Shelf water temperature') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water salinity no lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_s)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

ggplot2::ggplot(df.join.shlf %>% filter(year >1997 & month %in% c(7,8,9)),
                aes(x=recruit_est, y=mean_s)) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = 'Linear')) + 
  geom_smooth(method = "lm", formula = y ~ x + I(x^2),
              size = 1, se = FALSE, aes(colour = 'Quadratic')) + 
  geom_smooth(method = "loess", formula = y ~ x, 
              size = 1, se = FALSE, aes(colour = 'Loess')) + 
  geom_smooth(method = "gam", formula = y ~  s(x), 
              size = 1, se = FALSE, aes(colour = 'Gam')) + 
  geom_smooth(method = "gam", formula = y ~ s(x, k = 3),
              size = 1, se = FALSE, aes(colour = 'Gam2')) +
  labs(title = 'Recruitment est x M.S.W July,Aug,Sept (1998:2020)') +
  scale_color_manual(name='Model',
                     breaks=c('Linear', 'Quadratic', 'Loess', 'Gam', 'Gam2'),
                     values=c('Linear'='black', 'Quadratic'='blue', 
                              'Loess'='red', 'Gam' = 'green',
                              'Gam2' = 'purple')) +
  theme_bw()

With lags

2 Months

## Shelf water volume 2 month lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_v_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume - lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water temperature  2 month lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_t_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water temperature') +
  labs(title = 'Shelf water temperature - lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water salinity 2 month lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_s_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity - lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

3 Months

## Shelf water volume 3 month lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_v_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume - lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water temperature 3 month lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_t_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water temperature') +
  labs(title = 'Shelf water temperature - lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water salinity 3 month lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_s_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity - lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

Annual

# Create shw vol by year w/lag
df.lag = shlfvol %>%
  group_by(year, month) %>% 
  summarise(mean_t = mean(ShW.T),
            mean_s = mean(ShW.S),
            mean_v = mean(ShW.Vol)) %>% 
  mutate(mean_t_lag2 = lag(mean_t,2),
         mean_t_lag3 = lag(mean_t,3), 
         mean_t_lag6 = lag(mean_t,6),
         mean_s_lag2 = lag(mean_s,2),
         mean_s_lag3 = lag(mean_s,3), 
         mean_s_lag6 = lag(mean_s,6),
         mean_v_lag2 = lag(mean_v,2),
         mean_v_lag3 = lag(mean_v,3), 
         mean_v_lag6 = lag(mean_v,6))
# Join in-situ bottom temps w/assessment recruitment estimate
df.join.shlf = dplyr::full_join(recruit, df.lag, by = join_by(year)) %>%
  dplyr::select(year, month, recruit_est, mean_t, mean_s, mean_v, 
                mean_t_lag2, mean_s_lag2, mean_v_lag2,
                mean_t_lag3, mean_s_lag3, mean_v_lag3,
                mean_t_lag6, mean_s_lag6, mean_v_lag6) %>%
  tidyr::drop_na()

## Shelf water vol 

ggplot2::ggplot(df.join.shlf,
                aes(x=year, y=mean_v)) + 
  geom_point(color = 'black') +
  geom_line(color = 'black') +
  xlab('Year') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume') +
  theme_bw()

ggplot2::ggplot() + 
  geom_line(data = df.join.shlf, aes(x=year, y=mean_s), color = 'red') +
  geom_line(data = df.join.shlf,aes(x=year, y=mean_t*1), color = 'blue') +
  ylim(30.0,34.0) +
  scale_y_continuous(name = 'Sh.Water Salinity', 
                      sec.axis = sec_axis(~./1, name = 'Sh.Water Temperature')) + 
  xlab('Year') +
  labs(title = 'Shelf water salinity/temperature') +
  theme_bw()

ggplot2::ggplot() + 
  geom_line(data = df.join.shlf, aes(x=year, y=mean_s), color = 'red') +
  xlab('Year') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity') +
  theme_bw()

## Shelf water vol no lag

ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_v)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water temperature no lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_t)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water temperature') +
  labs(title = 'Shelf water temperature') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water salinity no lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_s)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

With lags

## Shelf water vol 6 yr lag

ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_v_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume - lag 6 years') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water temperature 6 yr lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_t_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water temperature') +
  labs(title = 'Shelf water temperature - lag 6 years') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water salinity 6 yr lag
ggplot2::ggplot(df.join.shlf,
                aes(x=recruit_est, y=mean_s_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Recruitment estimate') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity - lag 6 years') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

print(paste0('correlation: ', round(cor(df.join.shlf[,2], df.join.shlf[,13]), 4)))
## [1] "correlation: NA"

Correlations

df.join.shlf = dplyr::full_join(recruit, df.lag, by = join_by(year)) %>%
  dplyr::select(year, month, recruit_est, mean_t, mean_s, mean_v) %>%
  tidyr::drop_na() #removing lags to visualize all months
 
df.join.shlf = mutate(df.join.shlf, mean_v_lag1 = lag(mean_v,12)) #creating one year lag

#by season 
shlfvol_winter <- filter(df.join.shlf, month %in% c(1,2,3))
shlfvol_spring <- filter(df.join.shlf, month %in% c(4,5,6))
shlfvol_summer <- filter(df.join.shlf, month %in% c(7,8,9))
shlfvol_fall <- filter(df.join.shlf, month %in% c(10,11,12))

## shelf water volume - one year lag (summer = peak spawn)
lm_lag<-lm(mean_v_lag1 ~ recruit_est, data=shlfvol_summer)
summary(lm_lag)
## 
## Call:
## lm(formula = mean_v_lag1 ~ recruit_est, data = shlfvol_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1459.95  -542.14   -91.96   353.47  1515.80 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.035e+03  2.214e+02  18.226   <2e-16 ***
## recruit_est 5.873e-05  1.383e-04   0.425    0.673    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 745.8 on 53 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.003392,   Adjusted R-squared:  -0.01541 
## F-statistic: 0.1804 on 1 and 53 DF,  p-value: 0.6727
cor.test(shlfvol_summer$recruit_est, shlfvol_summer$mean_v_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  shlfvol_summer$recruit_est and shlfvol_summer$mean_v_lag1
## t = 0.42475, df = 53, p-value = 0.6727
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2103017  0.3186189
## sample estimates:
##        cor 
## 0.05824508
ggscatter(shlfvol_summer, x = "recruit_est", y = "mean_v_lag1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean shelf water volume",
          title="One year lag")

## shelf water volume - winter
lm_winter<-lm(mean_v ~ recruit_est, data=shlfvol_winter)
summary(lm_winter)
## 
## Call:
## lm(formula = mean_v ~ recruit_est, data = shlfvol_winter)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1825.94  -408.58   -48.77   589.87  1899.07 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.562e+03  2.530e+02  14.079   <2e-16 ***
## recruit_est 8.842e-05  1.564e-04   0.565    0.574    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 849.7 on 53 degrees of freedom
## Multiple R-squared:  0.005997,   Adjusted R-squared:  -0.01276 
## F-statistic: 0.3198 on 1 and 53 DF,  p-value: 0.5741
cor.test(shlfvol_winter$recruit_est, shlfvol_winter$mean_v, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  shlfvol_winter$recruit_est and shlfvol_winter$mean_v
## t = 0.56548, df = 53, p-value = 0.5741
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1917969  0.3358382
## sample estimates:
##        cor 
## 0.07744072
ggscatter(shlfvol_winter, x = "recruit_est", y = "mean_v", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean shelf water volume",
          title="Winter")

## shelf water volume - spring
lm_spring<-lm(mean_v ~ recruit_est, data=shlfvol_spring)
summary(lm_spring)
## 
## Call:
## lm(formula = mean_v ~ recruit_est, data = shlfvol_spring)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1551.4  -217.0   106.6   371.5  1058.3 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.303e+03  2.474e+02   17.39   <2e-16 ***
## recruit_est -1.105e-04  1.492e-04   -0.74    0.465    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 655.8 on 29 degrees of freedom
## Multiple R-squared:  0.01856,    Adjusted R-squared:  -0.01529 
## F-statistic: 0.5483 on 1 and 29 DF,  p-value: 0.465
cor.test(shlfvol_spring$recruit_est, shlfvol_spring$mean_v, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  shlfvol_spring$recruit_est and shlfvol_spring$mean_v
## t = -0.74048, df = 29, p-value = 0.465
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4679736  0.2291804
## sample estimates:
##       cor 
## -0.136222
ggscatter(shlfvol_spring, x = "recruit_est", y = "mean_v", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean shelf water volume",
          title="spring")

## shelf water volume - summer
lm_summer<-lm(mean_v ~ recruit_est, data=shlfvol_summer)
summary(lm_summer)
## 
## Call:
## lm(formula = mean_v ~ recruit_est, data = shlfvol_summer)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1826.3  -517.7  -138.3   392.3  1634.7 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.287e+03  2.382e+02  17.995   <2e-16 ***
## recruit_est -1.007e-04  1.489e-04  -0.676    0.501    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 823.4 on 58 degrees of freedom
## Multiple R-squared:  0.007827,   Adjusted R-squared:  -0.00928 
## F-statistic: 0.4575 on 1 and 58 DF,  p-value: 0.5015
cor.test(shlfvol_summer$recruit_est, shlfvol_summer$mean_v, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  shlfvol_summer$recruit_est and shlfvol_summer$mean_v
## t = -0.67641, df = 58, p-value = 0.5015
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3348709  0.1692581
## sample estimates:
##         cor 
## -0.08846888
ggscatter(shlfvol_summer, x = "recruit_est", y = "mean_v", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean shelf water volume",
          title="Summer")

## shelf water volume - fall
lm_fall<-lm(mean_v ~ recruit_est, data=shlfvol_fall)
summary(lm_fall)
## 
## Call:
## lm(formula = mean_v ~ recruit_est, data = shlfvol_fall)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1503.4  -637.8  -157.9   701.3  1399.4 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.417e+03  3.567e+02   9.579 5.15e-10 ***
## recruit_est 4.819e-05  2.024e-04   0.238    0.814    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 874.2 on 26 degrees of freedom
## Multiple R-squared:  0.002176,   Adjusted R-squared:  -0.0362 
## F-statistic: 0.0567 on 1 and 26 DF,  p-value: 0.8137
cor.test(shlfvol_fall$recruit_est, shlfvol_fall$mean_v, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  shlfvol_fall$recruit_est and shlfvol_fall$mean_v
## t = 0.23811, df = 26, p-value = 0.8137
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3322114  0.4125444
## sample estimates:
##      cor 
## 0.046647
ggscatter(shlfvol_fall, x = "recruit_est", y = "mean_v", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean shelf water volume",
          title="Fall")

## shelf water temperature
lm_shlftemp<-lm(mean_t ~ recruit_est, data=df.join.shlf)
summary(lm_shlftemp)
## 
## Call:
## lm(formula = mean_t ~ recruit_est, data = df.join.shlf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8944 -4.4381  0.5717  3.8118  7.3266 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.093e+01  7.017e-01  15.580   <2e-16 ***
## recruit_est 4.946e-08  4.271e-07   0.116    0.908    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.237 on 172 degrees of freedom
## Multiple R-squared:  7.797e-05,  Adjusted R-squared:  -0.005736 
## F-statistic: 0.01341 on 1 and 172 DF,  p-value: 0.9079
cor.test(df.join.shlf$recruit_est, df.join.shlf$mean_t, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  df.join.shlf$recruit_est and df.join.shlf$mean_t
## t = 0.11581, df = 172, p-value = 0.9079
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1401238  0.1573931
## sample estimates:
##         cor 
## 0.008830066
ggscatter(df.join.shlf, x = "recruit_est", y = "mean_t", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean shelf water temperature")

## shelf water salinity
lm_shlfsal<-lm(mean_s ~ recruit_est, data=df.join.shlf)
summary(lm_shlfsal)
## 
## Call:
## lm(formula = mean_s ~ recruit_est, data = df.join.shlf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9007 -0.1919  0.0226  0.2090  0.6810 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.284e+01  5.441e-02 603.562   <2e-16 ***
## recruit_est -1.769e-08  3.311e-08  -0.534    0.594    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3286 on 172 degrees of freedom
## Multiple R-squared:  0.001657,   Adjusted R-squared:  -0.004147 
## F-statistic: 0.2855 on 1 and 172 DF,  p-value: 0.5938
cor.test(df.join.shlf$recruit_est, df.join.shlf$mean_s, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  df.join.shlf$recruit_est and df.join.shlf$mean_s
## t = -0.53436, df = 172, p-value = 0.5938
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1883400  0.1087174
## sample estimates:
##         cor 
## -0.04071088
ggscatter(df.join.shlf, x = "recruit_est", y = "mean_s", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean shelf water salinity")

Updated plots - SWV no lag 12/19

df = df.join.shlf %>% 
    mutate(season = case_when(month %in% c(1,2,3) ~ 'winter', 
                              month %in% c(4,5,6) ~ 'spring', 
                              month %in% c(7,8,9) ~ 'summer', 
                              month %in% c(10,11,12) ~ 'fall'))


df %>% group_by(year, season) %>% 
  summarize(v = mean_v,
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=v, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(v = mean(mean_v),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=v, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Updated plots - SWV with lag 12/19

df %>% group_by(year, season) %>% 
  summarize(v = mean(mean_v_lag1),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=v, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(v = mean(mean_v_lag1),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=v, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Gulf-stream index

Gulf stream index was calculated based on method presented by Pérez-Hernández and Joyce (2014). The gulf stream index (GSI) is a measure of the degrees latitude above the average Gulf Stream position based on ocean temperature at 200m (15 C) depth between 55W to 75W.

Positive values indicate a the mean position of the GS is more Northernly, whereas negative values indicate a more Southernly position.

# positive are more Northerly, negative are more southernly 

## --- Note below is the original data source, which was modified to include 
## --- month and then saved as a new file (mm_gsi_1954_2022_chen.csv).
# gsi.a <- ecodata::gsi
# gsi.m <- read.csv(here::here('data/gulf_stream_index/Chen_EN4_T200_GSI_1954_2022_monthly - Zhuomin Chen.xlsx - Sheet1.csv'))

# # weird inefficient solution I came up with to get month values (but it works)
# gsi.m$year <- as.numeric(gsub("(^\\d{4}).*", "\\1", gsi.m$Month))
# gsi.m$m.1 <- round(str_extract(gsi.m$Month, '\\d+([.,]\\d+)?') %>% as.numeric()- gsi.m$year,2)
# gsi.m$month <- round(as.numeric(str_extract(gsi.m$m.1, '\\d\\d')))
# is.na(gsi.m$month) <- 10
# gsi.m$month <- replace(gsi.m$month, is.na(gsi.m$month), 10)
# gsi.m <- gsi.m[,-4]
# # saving so don't have to do that everytime:
# write.csv(gsi.m, 'mm_gsi_1954_2022_chen.csv') #


gsi.m <- read.csv(here::here('data/gulf_stream_index/mm_gsi_1954_2022_chen.csv'))

# df <- dplyr::full_join(recruit, gsi.m, by = join_by(year)) %>%
#   dplyr::select(year, month, recruit_est, GSI) %>%
#   filter(month %in% c(3:9)) %>% 
#   tidyr::pivot_wider(names_from = c(month),
#                      values_from = c(GSI)) %>% 
#   rlang::set_names(c('year','recruit_est','Mar', 'Apr', 'May',                                                                   'Jun', 'Jul', 'Aug', 'Sep')) %>% 
#   tidyr::drop_na()


## --- This joins recruit estimate data to GSI data and 
## --- calculates some summary stats(mean, sd, min, max)
df <- dplyr::full_join(recruit, gsi.m %>%
                   group_by(year) %>% 
                   filter(month %in% c(3:8)) %>% 
                   summarise(m.gsi = mean(GSI),
                             sd.gsi = sd(GSI),
                             max.gsi = max(GSI), 
                             min.gsi = min(GSI)), 
                 by = join_by(year)) %>% 
  mutate(pos = ifelse(m.gsi > 0, 'Northerly', 'Southerly'), 
         n.pos = ifelse(m.gsi > 0, 1, 0))
df <- df[-c(51:69),] # this removes rows w/years that don't match 
# (could/should do this before hand in more standardized way - will do later)

# Plot the time series (mean GSI for the months of interest)
# -- Note: here I am looking just at March through August assuming these
# months are the most important to recently spawned individuals 
ggplot(data = df, 
       aes(x = year, y = m.gsi)) +
  geom_line(lwd = 1) +
  geom_hline(yintercept = 0, lty = 2) +
  labs(title = 'Mean GS position anomaly March:August',
       x = 'Year', 
       y = "Gulf stream position anomaly\n") +
theme_bw() 

# dplyr::full_join(recruit, gsi.m, by = join_by(year)) %>%
#   dplyr::select(year, month, recruit_est, GSI) %>%
#   #filter(month %in% c(3:9)) %>%  
#   filter(year>1997) %>% 
#   tidyr::drop_na() %>% 
#   ggplot2::ggplot(.,aes(x=recruit_est, y=GSI)) + 
#   geom_point(color = 'black') +
#   xlab('Recruitment estimate') +
#   ylab('Gulf stream position anomaly') +
#   labs(title = 'Gulf stream index') +
#   geom_hline(yintercept = 0, lty = 2) +
#   theme_bw()


# Looking across all months 
dplyr::full_join(recruit, gsi.m, by = join_by(year)) %>%
  dplyr::select(year, month, recruit_est, GSI) %>%
  #filter(month %in% c(3:9)) %>%  
  filter(year>1997) %>% 
  tidyr::drop_na() %>% 
ggplot2::ggplot(.,aes(x=recruit_est, y=GSI)) + 
  geom_point(color = 'black') +
  facet_wrap(~month)+
  xlab('Recruitment estimate') +
  ylab('Gulf stream position anomaly') +
  labs(title = 'Gulf stream index by month') +
  geom_hline(yintercept = 0, lty = 2) +
  theme_bw()

# Looking across April through July
dplyr::full_join(recruit, gsi.m, by = join_by(year)) %>%
  dplyr::select(year, month, recruit_est, GSI) %>%
  #filter(month %in% c(3:9)) %>%  
  filter(year>1997 & month %in% c(4:7)) %>% 
  tidyr::drop_na() %>% 
  ggplot2::ggplot(.,aes(x=recruit_est, y=GSI)) + 
  geom_point(color = 'black') +
  xlab('Recruitment estimate') +
  ylab('Gulf stream position anomaly') +
  labs(title = 'Gulf stream index (Apr:Jul)') +
  geom_hline(yintercept = 0, lty = 2) +
  theme_bw()

Correlations

df.gsi = dplyr::full_join(recruit, gsi.m, by = join_by(year)) %>%
  dplyr::select(year, month, recruit_est, GSI) %>%
  mutate(gsi_lag1 = lag(GSI,12)) %>%
  filter(year>1997) %>% 
  tidyr::drop_na() #GSI by month and with year lag

#by season
gsi_winter <- filter(df.gsi, month %in% c(1,2,3))
gsi_spring <- filter(df.gsi, month %in% c(4,5,6))
gsi_summer <- filter(df.gsi, month %in% c(7,8,9))
gsi_fall <- filter(df.gsi, month %in% c(10,11,12))

## GSI - one year lag
lm_lag<-lm(gsi_lag1 ~ recruit_est, data=gsi_summer)
summary(lm_lag)
## 
## Call:
## lm(formula = gsi_lag1 ~ recruit_est, data = gsi_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25835 -0.53882 -0.09034  0.50124  1.57858 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  5.700e-01  1.918e-01   2.972   0.0041 **
## recruit_est -4.877e-08  1.182e-07  -0.413   0.6812   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7602 on 67 degrees of freedom
## Multiple R-squared:  0.002536,   Adjusted R-squared:  -0.01235 
## F-statistic: 0.1703 on 1 and 67 DF,  p-value: 0.6812
cor.test(gsi_summer$recruit_est, gsi_summer$gsi_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_summer$recruit_est and gsi_summer$gsi_lag1
## t = -0.41269, df = 67, p-value = 0.6812
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2836542  0.1885740
## sample estimates:
##         cor 
## -0.05035404
ggscatter(gsi_summer, x = "recruit_est", y = "gsi_lag1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Gulf Stream Position Anomaly",
          title="One year lag")

## GSI - winter
lm_winter<-lm(GSI ~ recruit_est, data=gsi_winter)
summary(lm_winter)
## 
## Call:
## lm(formula = GSI ~ recruit_est, data = gsi_winter)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.16418 -0.68857  0.00435  0.78957  1.61442 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 3.965e-01  2.241e-01   1.769   0.0814 .
## recruit_est 1.343e-07  1.381e-07   0.972   0.3345  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8886 on 67 degrees of freedom
## Multiple R-squared:  0.01391,    Adjusted R-squared:  -0.0008119 
## F-statistic: 0.9448 on 1 and 67 DF,  p-value: 0.3345
cor.test(gsi_winter$recruit_est, gsi_winter$GSI, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_winter$recruit_est and gsi_winter$GSI
## t = 0.97203, df = 67, p-value = 0.3345
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1221670  0.3449758
## sample estimates:
##       cor 
## 0.1179234
ggscatter(gsi_winter, x = "recruit_est", y = "GSI", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Gulf Stream Position Anomaly",
          title="Winter")

## GSI - spring
lm_spring<-lm(GSI ~ recruit_est, data=gsi_spring)
summary(lm_spring)
## 
## Call:
## lm(formula = GSI ~ recruit_est, data = gsi_spring)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8868 -0.3629 -0.0042  0.4637  1.2688 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 3.253e-01  1.845e-01   1.764   0.0824 .
## recruit_est 1.557e-07  1.137e-07   1.370   0.1754  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7313 on 67 degrees of freedom
## Multiple R-squared:  0.02723,    Adjusted R-squared:  0.01271 
## F-statistic: 1.876 on 1 and 67 DF,  p-value: 0.1754
cor.test(gsi_spring$recruit_est, gsi_spring$GSI, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_spring$recruit_est and gsi_spring$GSI
## t = 1.3695, df = 67, p-value = 0.1754
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07457117  0.38660302
## sample estimates:
##       cor 
## 0.1650221
ggscatter(gsi_spring, x = "recruit_est", y = "GSI", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Gulf Stream Position Anomaly",
          title="spring")

## GSI - summer
lm_summer<-lm(GSI ~ recruit_est, data=gsi_summer)
summary(lm_summer)
## 
## Call:
## lm(formula = GSI ~ recruit_est, data = gsi_summer)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3212 -0.5852 -0.1173  0.4797  1.5880 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 4.918e-01  1.906e-01   2.581   0.0121 *
## recruit_est 4.398e-08  1.174e-07   0.374   0.7092  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7556 on 67 degrees of freedom
## Multiple R-squared:  0.002089,   Adjusted R-squared:  -0.01281 
## F-statistic: 0.1402 on 1 and 67 DF,  p-value: 0.7092
cor.test(gsi_summer$recruit_est, gsi_summer$GSI, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_summer$recruit_est and gsi_summer$GSI
## t = 0.37448, df = 67, p-value = 0.7092
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1930668  0.2793612
## sample estimates:
##        cor 
## 0.04570227
ggscatter(gsi_summer, x = "recruit_est", y = "GSI", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Gulf Stream Position Anomaly",
          title="summer")

## GSI - fall
lm_fall<-lm(GSI ~ recruit_est, data=gsi_fall)
summary(lm_fall)
## 
## Call:
## lm(formula = GSI ~ recruit_est, data = gsi_fall)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0182 -0.6567  0.1503  0.5979  1.5617 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 5.572e-01  2.055e-01   2.712  0.00849 **
## recruit_est 4.091e-08  1.266e-07   0.323  0.74762   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8146 on 67 degrees of freedom
## Multiple R-squared:  0.001556,   Adjusted R-squared:  -0.01335 
## F-statistic: 0.1044 on 1 and 67 DF,  p-value: 0.7476
cor.test(gsi_fall$recruit_est, gsi_fall$GSI, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_fall$recruit_est and gsi_fall$GSI
## t = 0.3231, df = 67, p-value = 0.7476
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1990968  0.2735694
## sample estimates:
##        cor 
## 0.03944253
ggscatter(gsi_fall, x = "recruit_est", y = "GSI", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Gulf Stream Position Anomaly",
          title="Fall")

Should I add a lag? If so, how much? From Nesslage_ea_21: For the Northern stock- From the original 49 explanatory variables, the final RF model included 10 variables: annual AMO (lagged 5–7 years); December to April AMO (lagged 5–7 years); station-based December to February NAO (lagged 3 and 4 years); PC-based December to February NAO (lagged 4 years), and management time block (Table 1; Figure S3). The final GAMM based on backward selection of variables in- cluded December to April AMO lagged 7 years and station-based December to February NAO lagged 3 and 4 years (Table 1). The shapes of the relationships approximated by the RF and GAMM indi- cate that golden tilefish landings were higher during negative AMO and positive NAO, with their respective lags (Figures 2 and 3). The largest range of the smoothed term on the y-axis corresponded with December to April AMO lagged 7 years (Figures 2 and 3) and this co- variate contributed 52.5% of the GAM R2 (Figure 4), implying AMO has the largest influence on northern landings. In contrast, NAO co-variates at lags of 3 and 4 years contributed a combined 47.5% of the GAM R2 (Figure 4).

The random forest using the full dataset identified 62 significant variables from the original 121 explanatory variables (Table 1), including two versions of the AMO (annual and seasonal with each lagged 0–7 years), both seasonal versions of PC-based NAO (each lagged 0–7 years), Labrador Current transport indices (NE Track 191: lagged 0, 4, 9, and 10 quarters), Gulf Stream index of position anomalies (lagged 0, 1, 4–10, and 12 quarters), Gulf stream position indices (lagged 0–3 years), Gulf stream transport index (lagged 0–3 years), bottom temperature anomalies (lagged 0–2, 4–7 years), and time block (Figure S1). The final GAMM for northern CPUE in- cluded four variables: annual AMO lagged 6 years, December to April AMO lagged 7 years, Gulf Stream index of position anomalies lagged 12 quarters, and the Labrador Current transport index for NE Track 191 unlagged (Table 1; Figure 5). Annual AMO lagged 6 years and December to April AMO lagged 7 years contributed a combined 64.1% of the GAM R2(Figure 4). Gulf Stream and Labrador Current transport indices contributed only 19.7% and 16.2%, respectively, of the GAM R2(Figure 4).

# recruitment index across all years 
dplyr::full_join(recruit, gsi.m %>%
                     group_by(year) %>% 
                     filter(month %in% c(3:8)) %>% 
                     summarise(m.gsi = mean(GSI)), 
                   by = join_by(year)) %>%
  ggplot2::ggplot(., aes(x=recruit_est, y=m.gsi)) + 
  geom_point(color = 'black') +
  geom_hline(yintercept = 0, lty = 2)+
  labs(title = 'All years') +
  xlab('Recruitment estimate') +
  ylab('Gulf stream position anomaly') +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = 'Linear')) + 
  geom_smooth(method = "lm", formula = y ~ x + I(x^2),
              size = 1, se = FALSE, aes(colour = 'Quadratic')) + 
  geom_smooth(method = "loess", formula = y ~ x, 
              size = 1, se = FALSE, aes(colour = 'Loess')) + 
  geom_smooth(method = "gam", formula = y ~  s(x), 
              size = 1, se = FALSE, aes(colour = 'Gam')) + 
  geom_smooth(method = "gam", formula = y ~ s(x, k = 3),
              size = 1, se = FALSE, aes(colour = 'Gam2')) +
  scale_color_manual(name='Model',
                     breaks=c('Linear', 'Quadratic', 'Loess', 'Gam', 'Gam2'),
                     values=c('Linear'='black', 'Quadratic'='blue', 
                              'Loess'='red', 'Gam' = 'green',
                              'Gam2' = 'purple')) +
  theme_bw()

# recruitment index across just the years that Paul recommended + el nino 1998  

tt = dplyr::full_join(recruit, gsi.m %>%
                     group_by(year) %>% 
                     filter(month %in% c(3:8)) %>% 
                     summarise(m.gsi = mean(GSI)), 
                   by = join_by(year)) 
  ggplot2::ggplot(tt %>% filter(year>1997), aes(x=recruit_est, y=m.gsi)) + 
  geom_point(color = 'black') +
  geom_hline(yintercept = 0, lty = 2)+
  labs(title = '1998:2020, No outliers') +
  xlab('Recruitment estimate') +
  ylab('Gulf stream position anomaly') +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = 'Linear')) + 
  geom_smooth(method = "lm", formula = y ~ x + I(x^2),
              size = 1, se = FALSE, aes(colour = 'Quadratic')) + 
  geom_smooth(method = "loess", formula = y ~ x, 
              size = 1, se = FALSE, aes(colour = 'Loess')) + 
  geom_smooth(method = "gam", formula = y ~  s(x), 
              size = 1, se = FALSE, aes(colour = 'Gam')) + 
  geom_smooth(method = "gam", formula = y ~ s(x, k = 3),
              size = 1, se = FALSE, aes(colour = 'Gam2')) +
  scale_color_manual(name='Model',
                     breaks=c('Linear', 'Quadratic', 'Loess', 'Gam', 'Gam2'),
                     values=c('Linear'='black', 'Quadratic'='blue', 
                              'Loess'='red', 'Gam' = 'green',
                              'Gam2' = 'purple')) +
  theme_bw()

ggplot(data = df, # add the data
       aes(x = year, y = m.gsi, # set x, y coordinates
           color = pos)) +    # color by GS position
  geom_boxplot() +
  facet_grid(~pos) + # create panes base on GS position
  ecodata::theme_facet()

# this gives main effects AND interactions
pos_aov <- aov(recruit_est ~ year * pos,
              data = df)

# look at effects and interactions
# summary(pos_aov)
tidy_pos_aov <- broom::tidy(pos_aov)
tidy_pos_aov
## # A tibble: 4 × 6
##   term         df   sumsq        meansq statistic p.value
##   <chr>     <dbl>   <dbl>         <dbl>     <dbl>   <dbl>
## 1 year          1 3.85e11 384693730334.     0.717   0.401
## 2 pos           1 5.37e10  53734167452.     0.100   0.753
## 3 year:pos      1 6.82e11 682304704182.     1.27    0.265
## 4 Residuals    46 2.47e13 536176435236.    NA      NA
# write.csv(tidy_pos_aov, 'gs_pos_aov.csv')



ggplot(data = df, 
             aes(x = recruit_est/1000, y = m.gsi, fill = pos,  
                 group = year)) +
   geom_bar(color = "black", stat = "identity", 
            position = position_dodge2(preserve = "single"), width = 20) +
  theme_bw() +
  labs(title = 'All years',
       x = "\nRecruitment estimate (x1000)", 
       y = "Gulf stream position anomaly\n")

ggplot(data = df %>% filter(year <= 1999), 
       aes(x = recruit_est/1000, y = m.gsi, fill = pos,  
           group = year)) +
  geom_bar(color = "black", stat = "identity", 
           position = position_dodge2(preserve = "single"), width = 40) +
  theme_bw() +
  labs(title = '1971:1999',
       x = "\nRecruitment estimate (x1000)", 
       y = "Gulf stream position anomaly\n")

ggplot(data = df %>% filter(year >1999), 
       aes(x = recruit_est/1000, y = m.gsi, fill = pos,  
           group = year)) +
  geom_bar(color = "black", stat = "identity", 
           position = position_dodge2(preserve = "single"), width = 40) +
  theme_bw() +
  labs(title = '2000:2020',
       x = "\nRecruitment estimate (x1000)", 
       y = "Gulf stream position anomaly\n")

Updated GSI No Lag Plots - 12/19

df = df.gsi %>% 
    mutate(season = case_when(month %in% c(1,2,3) ~ 'winter', 
                              month %in% c(4,5,6) ~ 'spring', 
                              month %in% c(7,8,9) ~ 'summer', 
                              month %in% c(10,11,12) ~ 'fall'))


df %>% group_by(year, season) %>% 
  summarize(gsi = GSI,
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=gsi, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(gsi = mean(GSI),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=gsi, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Updated GSI 1 year lag - 12/19

df %>% group_by(year, season) %>% 
  summarize(gsi = mean(gsi_lag1),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=gsi, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(gsi = mean(gsi_lag1),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=gsi, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Food availablity

Larval tilefish eat zooplankton, likely calanus. Calanus finmarchicus are a copepod (crustacean) with a one-year life cycle and are an important food source for many commercially important species. Calanus spp. are lipid rich, herbivorous species that eat phytoplankton, diatoms in particular (Hobbs et al. 2020).

Diatoms are often represented as microplankton (>20 µm), but many species are of the nanoplankton size class (2-20 µm), and a smaller few may even overlap with picoplanton size class (<2 µm).

Calanus

Calanus is not as common in MAB, need to figure out dominant zooplankton in MAB.

# Calanus
calanus <- ecodata::calanus_stage %>% filter(Time %in% c(1998:2021))%>% 
    rename_all(., .funs = tolower) %>% 
   mutate(year = time)


ggplot() +
  geom_line(data = calanus %>% filter(epu == 'GB', 
                                  var == 'adt-Spring'), 
       aes(x = year , y = value, col = epu), lwd = 1) + 
  geom_line(data = calanus %>% filter(epu == 'MAB', 
                                  var == 'adt-Spring'), 
       aes(x = year , y = value, col = epu), lwd = 1) +
  labs(color = c('EPU')) +
  theme_minimal()

Georges Bank

# GB
c5.gb <- calanus %>% filter(epu == 'GB', var == 'c5-Spring')
adult.gb <- calanus %>% filter(epu == 'GB', var == 'adt-Spring' )

df.c5 <- dplyr::full_join(recruit, c5.gb, by = join_by(year)) %>%
  dplyr::select(year, recruit_est, value) %>%
  tidyr::drop_na()
df.adt <- dplyr::full_join(recruit, adult.gb, by = join_by(year)) %>%
  dplyr::select(year, recruit_est, value) %>%
  tidyr::drop_na()

# Regression
ggscatter(df.c5, x = 'recruit_est', y = 'value', 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Recruitment estimate", 
          ylab = "Calanus c5 spring (No. per 100m^-3)",
           title = 'c5')  

ggscatter(df.adt, x = 'recruit_est', y = 'value', 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Recruitment estimate", 
          ylab = "Calanus adult spring (No. per 100m^-3)",
           title = 'Adult')  

# GLM 
eqn <- as.formula(paste('recruit_est ~', paste(colnames(df.c5)[1], 
                                               collapse = " + ")))

mod0 <- glm(recruit_est ~ 1, 
            data = df.c5, 
            family = "poisson")

mod1 <- glm(eqn, 
            data = df.c5, 
            family = "poisson")

summary(mod0)
## 
## Call:
## glm(formula = recruit_est ~ 1, family = "poisson", data = df.c5)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 1.420e+01  1.802e-04   78775   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 8933474  on 20  degrees of freedom
## Residual deviance: 8933474  on 20  degrees of freedom
## AIC: 8933809
## 
## Number of Fisher Scoring iterations: 4
summary(mod1)
## 
## Call:
## glm(formula = eqn, family = "poisson", data = df.c5)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.350e+01  5.805e-02   749.4   <2e-16 ***
## year        -1.460e-02  2.891e-05  -504.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 8933474  on 20  degrees of freedom
## Residual deviance: 8677368  on 19  degrees of freedom
## AIC: 8677705
## 
## Number of Fisher Scoring iterations: 4
AIC(mod0, mod1) %>% dplyr::arrange(AIC)
##      df     AIC
## mod1  2 8677705
## mod0  1 8933809
null_prediction <- exp(predict(mod0))
mod_prediction <- exp(predict(mod1))


plot(df.c5$year, df.c5$recruit_est, type = 'l')
lines(df.c5$year, null_prediction, col = "gray")
lines(df.c5$year, mod_prediction, col = "red")

Mid-atlantic

# Mid-Atlantic Bight
c5.mab <- calanus %>% filter(epu == 'MAB', var == 'c5-Spring')
adult.mab <- calanus %>% filter(epu == 'MAB', var == 'adt-Spring' )

df.c5 <- dplyr::full_join(recruit, c5.mab, by = join_by(year)) %>%
  dplyr::select(year, recruit_est, value) %>%
  tidyr::drop_na()
df.adt <- dplyr::full_join(recruit, adult.mab, by = join_by(year)) %>%
  dplyr::select(year, recruit_est, value) %>%
  tidyr::drop_na()

# Regression
ggscatter(df.c5, x = 'recruit_est', y = 'value', 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Recruitment estimate", 
          ylab = "Calanus c5 spring (No. per 100m^-3)",
           title = 'c5')  

ggscatter(df.adt, x = 'recruit_est', y = 'value', 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Recruitment estimate", 
          ylab = "Calanus adult spring (No. per 100m^-3)",
           title = 'Adult')  

Microplankton
micro<-read.csv(here::here('data/phyto_size_class/microplankton_ts_gtf_strata.csv'))
Microplankton maps
# microplankton
Microplankton analysis
# Microplankton

# micro with year lag
micro.lag <- micro %>%
  mutate(micro_lag1 = lag(weighted_mean_micro,12))
         
# Join with recruit estimate
micro.rec <- dplyr::full_join(recruit, micro.lag %>%
                   group_by(year) %>% 
                   filter(year %in% c(1998:2020)),
dplyr::select(year, month, mean_cpue, mean_micro, weighted_mean_micro, micro_lag1),
                 by = join_by(year))
micro.rec <- micro.rec[-c(1:27),] #removes year < 1998 (starting year for analysis)

#by season
micro_winter <- filter(micro.rec, month %in% c(1,2,3))
micro_winter <- micro_winter[-c(18),] # removes outlier

micro_spring <- filter(micro.rec, month %in% c(4,5,6))
micro_spring <- micro_spring[-c(7),]

micro_summer <- filter(micro.rec, month %in% c(7,8,9))
micro_summer <- micro_summer[-c(17),] #removes outlier

micro_fall <- filter(micro.rec, month %in% c(10,11,12))
micro_fall <- micro_fall[-c(62),] #removes outlier

## One year lag - Summer (peak spawn)
lm_lag<-lm(micro_lag1 ~ recruit_est, data=micro_summer)
summary(lm_lag)
## 
## Call:
## lm(formula = micro_lag1 ~ recruit_est, data = micro_summer)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.059018 -0.024279 -0.004239  0.013179  0.136674 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.102e-01  9.156e-03   12.03   <2e-16 ***
## recruit_est -4.544e-09  5.613e-09   -0.81    0.421    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03558 on 63 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.0103, Adjusted R-squared:  -0.005414 
## F-statistic: 0.6554 on 1 and 63 DF,  p-value: 0.4212
cor.test(micro_summer$recruit_est, micro_summer$micro_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  micro_summer$recruit_est and micro_summer$micro_lag1
## t = -0.80954, df = 63, p-value = 0.4212
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3370252  0.1460467
## sample estimates:
##        cor 
## -0.1014667
ggscatter(micro_summer, x = "recruit_est", y = "micro_lag1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean Microplankton",
          title="One Year Lag")

## Winter
lm_winter<-lm(weighted_mean_micro ~ recruit_est, data=micro_winter)
summary(lm_winter)
## 
## Call:
## lm(formula = weighted_mean_micro ~ recruit_est, data = micro_winter)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15902 -0.05443 -0.02121  0.04641  0.24403 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.126e-01  2.038e-02  15.337   <2e-16 ***
## recruit_est 1.945e-08  1.247e-08   1.559    0.124    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07927 on 66 degrees of freedom
## Multiple R-squared:  0.03552,    Adjusted R-squared:  0.02091 
## F-statistic: 2.431 on 1 and 66 DF,  p-value: 0.1238
cor.test(micro_winter$recruit_est, micro_winter$weighted_mean_micro, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  micro_winter$recruit_est and micro_winter$weighted_mean_micro
## t = 1.5591, df = 66, p-value = 0.1238
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05230153  0.40854039
## sample estimates:
##       cor 
## 0.1884738
ggscatter(micro_winter, x = "recruit_est", y = "weighted_mean_micro", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Winter Weighted Mean Microplankton",
          title="Winter")

## Spring
lm_spring<-lm(weighted_mean_micro ~ recruit_est, data=micro_spring)
summary(lm_spring)
## 
## Call:
## lm(formula = weighted_mean_micro ~ recruit_est, data = micro_spring)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23260 -0.14253 -0.02222  0.12166  0.36330 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.709e-01  3.992e-02   9.289 1.34e-13 ***
## recruit_est -2.326e-08  2.501e-08  -0.930    0.356    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1565 on 66 degrees of freedom
## Multiple R-squared:  0.01293,    Adjusted R-squared:  -0.002025 
## F-statistic: 0.8646 on 1 and 66 DF,  p-value: 0.3559
cor.test(micro_spring$recruit_est, micro_spring$weighted_mean_micro, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  micro_spring$recruit_est and micro_spring$weighted_mean_micro
## t = -0.92982, df = 66, p-value = 0.3559
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3428412  0.1281894
## sample estimates:
##        cor 
## -0.1137111
ggscatter(micro_spring, x = "recruit_est", y = "weighted_mean_micro", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Spring Weighted Mean Microplankton",
          title="Spring")

## Summer
lm_summer<-lm(weighted_mean_micro ~ recruit_est, data=micro_summer)
summary(lm_summer)
## 
## Call:
## lm(formula = weighted_mean_micro ~ recruit_est, data = micro_summer)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.049815 -0.020294 -0.005428  0.014549  0.091153 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.043e-01  7.884e-03  13.224   <2e-16 ***
## recruit_est -2.676e-09  4.825e-09  -0.555    0.581    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03066 on 66 degrees of freedom
## Multiple R-squared:  0.004638,   Adjusted R-squared:  -0.01044 
## F-statistic: 0.3075 on 1 and 66 DF,  p-value: 0.5811
cor.test(micro_summer$recruit_est, micro_summer$weighted_mean_micro, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  micro_summer$recruit_est and micro_summer$weighted_mean_micro
## t = -0.55455, df = 66, p-value = 0.5811
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3016296  0.1731342
## sample estimates:
##         cor 
## -0.06810209
ggscatter(micro_summer, x = "recruit_est", y = "weighted_mean_micro", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean Microplankton",
          title="Summer")

## Fall
lm_fall<-lm(weighted_mean_micro ~ recruit_est, data=micro_fall)
summary(lm_fall)
## 
## Call:
## lm(formula = weighted_mean_micro ~ recruit_est, data = micro_fall)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22693 -0.10823 -0.00688  0.06245  0.48119 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.727e-01  3.558e-02   7.665 1.05e-10 ***
## recruit_est 2.725e-08  2.203e-08   1.237     0.22    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.141 on 66 degrees of freedom
## Multiple R-squared:  0.02267,    Adjusted R-squared:  0.007858 
## F-statistic: 1.531 on 1 and 66 DF,  p-value: 0.2204
cor.test(micro_fall$recruit_est, micro_fall$weighted_mean_micro, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  micro_fall$recruit_est and micro_fall$weighted_mean_micro
## t = 1.2372, df = 66, p-value = 0.2204
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.09114366  0.37549939
## sample estimates:
##       cor 
## 0.1505531
ggscatter(micro_fall, x = "recruit_est", y = "weighted_mean_micro", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Fall Weighted Mean Microplankton",
          title="Fall")

Updated plots Micro. no lag - 12/19

df = micro.rec %>% 
    mutate(season = case_when(month %in% c(1,2,3) ~ 'winter', 
                              month %in% c(4,5,6) ~ 'spring', 
                              month %in% c(7,8,9) ~ 'summer', 
                              month %in% c(10,11,12) ~ 'fall'))


df %>% group_by(year, season) %>% 
  summarize(micro = weighted_mean_micro,
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=micro, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(micro = weighted_mean_micro,
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=micro, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Updated plots Micro. 1 year lag - 12/19

df %>% group_by(year, season) %>% 
  summarize(micro = mean(micro_lag1),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=micro, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(micro = mean(micro_lag1),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=micro, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Chlorophyll-A
# CHL-A
chl<-read.csv(here::here('data/chl/chl_ts_gtf_strata.csv'))

# chl with year lag
chl.lag <- chl %>%
  mutate(chl_lag1 = lag(weighted_mean_chl,12))
         
# Join with recruit estimate
chl.rec <- dplyr::full_join(recruit, chl.lag %>%
                   group_by(year) %>% 
                   filter(year %in% c(1998:2020)),
dplyr::select(year, month, mean_cpue, mean_chl, weighted_mean_chl, chl_lag1),
                 by = join_by(year))
chl.rec <- chl.rec[-c(1:27),] #removes year < 1998 (starting year for analysis)
Maps (Chlorophyll-A)
# CHL-A
Chlorophyll-A analysis
# CHL-A
#by season
chl_winter <- filter(chl.rec, month %in% c(1,2,3))
chl_winter <- chl_winter[-c(18),] # removes outlier

chl_spring <- filter(chl.rec, month %in% c(4,5,6))
#chl_spring <- chl_spring[-c(7),]

chl_summer <- filter(chl.rec, month %in% c(7,8,9))
#chl_summer <- chl_summer[-c(17),] #removes outlier

chl_fall <- filter(chl.rec, month %in% c(10,11,12))
chl_fall <- chl_fall[-c(62),] #removes outlier

## One year lag - Summer (peak spawn)
lm_lag<-lm(chl_lag1 ~ recruit_est, data=chl_summer)
summary(lm_lag)
## 
## Call:
## lm(formula = chl_lag1 ~ recruit_est, data = chl_summer)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.173365 -0.064682 -0.004704  0.051986  0.271157 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.769e-01  2.297e-02  16.407   <2e-16 ***
## recruit_est -1.002e-08  1.418e-08  -0.707    0.482    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09099 on 64 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.007747,   Adjusted R-squared:  -0.007756 
## F-statistic: 0.4997 on 1 and 64 DF,  p-value: 0.4822
cor.test(chl_summer$recruit_est, chl_summer$chl_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  chl_summer$recruit_est and chl_summer$chl_lag1
## t = -0.7069, df = 64, p-value = 0.4822
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3231677  0.1573656
## sample estimates:
##        cor 
## -0.0880196
ggscatter(chl_summer, x = "recruit_est", y = "chl_lag1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean Chlorophyll",
          title="One Year Lag")

## Winter
lm_winter<-lm(weighted_mean_chl ~ recruit_est, data=chl_winter)
summary(lm_winter)
## 
## Call:
## lm(formula = weighted_mean_chl ~ recruit_est, data = chl_winter)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.141707 -0.073832 -0.008945  0.063666  0.284785 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.442e-01  2.444e-02  26.361   <2e-16 ***
## recruit_est 1.345e-08  1.496e-08   0.899    0.372    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09505 on 66 degrees of freedom
## Multiple R-squared:  0.0121, Adjusted R-squared:  -0.002866 
## F-statistic: 0.8085 on 1 and 66 DF,  p-value: 0.3718
cor.test(chl_winter$recruit_est, chl_winter$weighted_mean_chl, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  chl_winter$recruit_est and chl_winter$weighted_mean_chl
## t = 0.89917, df = 66, p-value = 0.3718
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1318753  0.3395284
## sample estimates:
##       cor 
## 0.1100087
ggscatter(chl_winter, x = "recruit_est", y = "weighted_mean_chl", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Winter Weighted Mean Chlorophyll",
          title="Winter")

## Spring
lm_spring<-lm(weighted_mean_chl ~ recruit_est, data=chl_spring)
summary(lm_spring)
## 
## Call:
## lm(formula = weighted_mean_chl ~ recruit_est, data = chl_spring)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36925 -0.14812 -0.00048  0.13185  0.39266 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.248e-01  4.660e-02  15.555   <2e-16 ***
## recruit_est -1.770e-08  2.871e-08  -0.616     0.54    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1847 on 67 degrees of freedom
## Multiple R-squared:  0.005639,   Adjusted R-squared:  -0.009203 
## F-statistic: 0.3799 on 1 and 67 DF,  p-value: 0.5397
cor.test(chl_spring$recruit_est, chl_spring$weighted_mean_chl, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  chl_spring$recruit_est and chl_spring$weighted_mean_chl
## t = -0.61639, df = 67, p-value = 0.5397
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3063278  0.1645132
## sample estimates:
##         cor 
## -0.07509132
ggscatter(chl_spring, x = "recruit_est", y = "weighted_mean_chl", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Spring Weighted Mean Chlorophyll",
          title="Spring")

## Summer
lm_summer<-lm(weighted_mean_chl ~ recruit_est, data=chl_summer)
summary(lm_summer)
## 
## Call:
## lm(formula = weighted_mean_chl ~ recruit_est, data = chl_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14300 -0.06812 -0.01667  0.05040  0.26048 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.876e-01  2.241e-02  17.296   <2e-16 ***
## recruit_est -1.922e-08  1.381e-08  -1.392    0.169    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08884 on 67 degrees of freedom
## Multiple R-squared:  0.02809,    Adjusted R-squared:  0.01358 
## F-statistic: 1.936 on 1 and 67 DF,  p-value: 0.1687
cor.test(chl_summer$recruit_est, chl_summer$weighted_mean_chl, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  chl_summer$recruit_est and chl_summer$weighted_mean_chl
## t = -1.3915, df = 67, p-value = 0.1687
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.38885569  0.07193415
## sample estimates:
##     cor 
## -0.1676
ggscatter(chl_summer, x = "recruit_est", y = "weighted_mean_chl", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean Chlorophyll",
          title="Summer")

## Fall
lm_fall<-lm(weighted_mean_chl ~ recruit_est, data=chl_fall)
summary(lm_fall)
## 
## Call:
## lm(formula = weighted_mean_chl ~ recruit_est, data = chl_fall)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35212 -0.14826 -0.02091  0.12718  0.60037 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.674e-01  5.146e-02  12.969   <2e-16 ***
## recruit_est 3.414e-08  3.186e-08   1.071    0.288    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2039 on 66 degrees of freedom
## Multiple R-squared:  0.0171, Adjusted R-squared:  0.002205 
## F-statistic: 1.148 on 1 and 66 DF,  p-value: 0.2879
cor.test(chl_fall$recruit_est, chl_fall$weighted_mean_chl, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  chl_fall$recruit_est and chl_fall$weighted_mean_chl
## t = 1.0715, df = 66, p-value = 0.2879
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1111334  0.3580200
## sample estimates:
##       cor 
## 0.1307564
ggscatter(chl_fall, x = "recruit_est", y = "weighted_mean_chl", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Fall Weighted Mean Chlorophyll",
          title="Fall")

Updated CHL plots 12/19 - no lag

df = chl.rec %>% 
    mutate(season = case_when(month %in% c(1,2,3) ~ 'winter', 
                              month %in% c(4,5,6) ~ 'spring', 
                              month %in% c(7,8,9) ~ 'summer', 
                              month %in% c(10,11,12) ~ 'fall'))


df %>% group_by(year, season) %>% 
  summarize(chl = weighted_mean_chl,
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=chl, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(chl = weighted_mean_chl,
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=chl, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Updated CHL plots 12/19 - 1 year lag

df %>% group_by(year, season) %>% 
  summarize(chl = mean(chl_lag1),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=chl, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


df %>% group_by(year, season) %>% 
  summarize(chl = mean(chl_lag1),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=chl, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

SST Fronts
# SST fronts
fprob<-read.csv(here::here('data/sst_fronts/fprob_seasonal_ts_gtf_3x3.csv')) #fprob across all individual strata
fprob.ind<-read.csv(here::here('data/sst_fronts/fprob_seasonal_ts_gtf_indv_substrata_3x3.csv')) #mean for each of 14 substrata
fprob.ns<-read.csv(here::here('data/sst_fronts/fprob_seasonal_ts_gtf_n_v_s_substrata_3x3.csv')) #mean for substrata N and S of Hudson canyon

# fprob with year lag
fprob.lag <- fprob %>%
  mutate(fprob_lag1 = lag(weighted_mean_fprob,4))
         
# Join with recruit estimate
fprob.rec <- dplyr::full_join(recruit, fprob.lag %>%
                   group_by(year) %>% 
                   filter(year %in% c(1998:2020)),
dplyr::select(year, season, recruit_est, mean_fprob, weighted_mean_fprob, fprob_lag1),
                 by = join_by(year))
fprob.rec <- fprob.rec[-c(1:29),] #removes year < 2000 (starting year for fprob)
Maps (SST fronts)
SST Fronts analysis
## Across all strata

#by season
fprob_winter <- filter(fprob.rec, season %in% c("winter"))
#fprob_winter <- fprob_winter[-c(18),] # removes outlier

fprob_spring <- filter(fprob.rec, season %in% c("spring"))
#fprob_spring <- fprob_spring[-c(7),]

fprob_summer <- filter(fprob.rec, season %in% c("summer"))
#fprob_summer <- fprob_summer[-c(17),] #removes outlier

fprob_fall <- filter(fprob.rec, season %in% c("fall"))
#fprob_fall <- fprob_fall[-c(62),] #removes outlier

## One year lag - Summer (peak spawn)
lm_lag<-lm(fprob_lag1 ~ recruit_est, data=fprob_summer)
summary(lm_lag)
## 
## Call:
## lm(formula = fprob_lag1 ~ recruit_est, data = fprob_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07285 -0.02821  0.01042  0.03520  0.05229 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.074e-01  1.861e-02  43.393   <2e-16 ***
## recruit_est -1.501e-08  1.313e-08  -1.144    0.268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03817 on 18 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.06773,    Adjusted R-squared:  0.01593 
## F-statistic: 1.308 on 1 and 18 DF,  p-value: 0.2678
cor.test(fprob_summer$recruit_est, fprob_summer$fprob_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  fprob_summer$recruit_est and fprob_summer$fprob_lag1
## t = -1.1435, df = 18, p-value = 0.2678
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6301882  0.2060033
## sample estimates:
##        cor 
## -0.2602413
ggscatter(fprob_summer, x = "recruit_est", y = "fprob_lag1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean SST Frontal Probability",
          title="One Year Lag")

## Winter
lm_winter<-lm(weighted_mean_fprob ~ recruit_est, data=fprob_winter)
summary(lm_winter)
## 
## Call:
## lm(formula = weighted_mean_fprob ~ recruit_est, data = fprob_winter)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.089471 -0.042545 -0.007391  0.058791  0.095752 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.806e-01  2.720e-02  28.702   <2e-16 ***
## recruit_est -4.173e-09  1.786e-08  -0.234    0.818    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05943 on 19 degrees of freedom
## Multiple R-squared:  0.002863,   Adjusted R-squared:  -0.04962 
## F-statistic: 0.05456 on 1 and 19 DF,  p-value: 0.8178
cor.test(fprob_winter$recruit_est, fprob_winter$weighted_mean_fprob, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  fprob_winter$recruit_est and fprob_winter$weighted_mean_fprob
## t = -0.23358, df = 19, p-value = 0.8178
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4742426  0.3871184
## sample estimates:
##         cor 
## -0.05351077
ggscatter(fprob_winter, x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Winter Weighted Mean SST Frontal Probability",
          title="Winter")

## Spring
lm_spring<-lm(weighted_mean_fprob ~ recruit_est, data=fprob_spring)
summary(lm_spring)
## 
## Call:
## lm(formula = weighted_mean_fprob ~ recruit_est, data = fprob_spring)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13127 -0.02817  0.01427  0.03149  0.09048 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 7.352e-01  2.774e-02  26.499   <2e-16 ***
## recruit_est 3.199e-08  1.822e-08   1.755   0.0953 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06062 on 19 degrees of freedom
## Multiple R-squared:  0.1395, Adjusted R-squared:  0.09423 
## F-statistic: 3.081 on 1 and 19 DF,  p-value: 0.09535
cor.test(fprob_spring$recruit_est, fprob_spring$weighted_mean_fprob, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  fprob_spring$recruit_est and fprob_spring$weighted_mean_fprob
## t = 1.7551, df = 19, p-value = 0.09535
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06935361  0.69339789
## sample estimates:
##       cor 
## 0.3735159
ggscatter(fprob_spring, x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Spring Weighted Mean SST Frontal Probability",
          title="Spring")

## Summer
lm_summer<-lm(weighted_mean_fprob ~ recruit_est, data=fprob_summer)
summary(lm_summer)
## 
## Call:
## lm(formula = weighted_mean_fprob ~ recruit_est, data = fprob_summer)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.06348 -0.03465  0.01292  0.03491  0.05230 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.885e-01  1.886e-02  41.804   <2e-16 ***
## recruit_est -2.364e-09  1.239e-08  -0.191    0.851    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04121 on 19 degrees of freedom
## Multiple R-squared:  0.001913,   Adjusted R-squared:  -0.05062 
## F-statistic: 0.03642 on 1 and 19 DF,  p-value: 0.8507
cor.test(fprob_summer$recruit_est, fprob_summer$weighted_mean_fprob, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  fprob_summer$recruit_est and fprob_summer$weighted_mean_fprob
## t = -0.19084, df = 19, p-value = 0.8507
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4666157  0.3954134
## sample estimates:
##        cor 
## -0.0437395
ggscatter(fprob_summer, x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Summer Weighted Mean SST Frontal Probability",
          title="Summer")

## Fall
lm_fall<-lm(weighted_mean_fprob ~ recruit_est, data=fprob_fall)
summary(lm_fall)
## 
## Call:
## lm(formula = weighted_mean_fprob ~ recruit_est, data = fprob_fall)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.071677 -0.022670  0.000026  0.026553  0.074282 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.072e-01  1.929e-02  41.846   <2e-16 ***
## recruit_est -9.798e-09  1.267e-08  -0.773    0.449    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04215 on 19 degrees of freedom
## Multiple R-squared:  0.03051,    Adjusted R-squared:  -0.02051 
## F-statistic: 0.598 on 1 and 19 DF,  p-value: 0.4489
cor.test(fprob_fall$recruit_est, fprob_fall$weighted_mean_fprob, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  fprob_fall$recruit_est and fprob_fall$weighted_mean_fprob
## t = -0.7733, df = 19, p-value = 0.4489
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5638485  0.2779676
## sample estimates:
##        cor 
## -0.1746798
ggscatter(fprob_fall, x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Fall Weighted Mean SST Frontal Probability",
          title="Fall")

Fprob of each substrata

#join data
ind.rec <- dplyr::full_join(recruit, fprob.ind %>%
                   group_by(year) %>% 
                   filter(year %in% c(1998:2020)),
dplyr::select(year, season, recruit_est, mean_fprob, weighted_mean_fprob, fprob_lag1),
                 by = join_by(year))
ind.rec <- ind.rec[-c(1:29),]  #begin at year 2000

#correlation plot
ggscatter(ind.rec, x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~id) +
  theme_facet()

# Winter
ggscatter(ind.rec %>%
            filter(season == "winter"), 
          x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~id) +
  theme_facet()

# Spring
ggscatter(ind.rec %>%
            filter(season == "spring"), 
          x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~id) +
  theme_facet()

# Summer
ggscatter(ind.rec %>%
            filter(season == "summer"), 
          x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~id) +
  theme_facet()

# Fall
ggscatter(ind.rec %>%
            filter(season == "fall"), 
          x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~id) +
  theme_facet()

Fprob N/S of Hudson Canyon

#join data
ns.rec <- dplyr::full_join(recruit, fprob.ns %>%
                   group_by(year) %>% 
                   filter(year %in% c(1998:2020)),
dplyr::select(year, season, recruit_est, mean_fprob, weighted_mean_fprob, fprob_lag1),
                 by = join_by(year))
ns.rec <- ns.rec[-c(1:29),]  #begin at year 2000

# Winter
ggscatter(ns.rec %>%
            filter(season == "winter"), 
          x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~substrat) +
  theme_facet()

# Spring
ggscatter(ns.rec %>%
            filter(season == "spring"), 
          x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~substrat) +
  theme_facet()

# Summer
ggscatter(ns.rec %>%
            filter(season == "summer"), 
          x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~substrat) +
  theme_facet()

# Fall
ggscatter(ns.rec %>%
            filter(season == "fall"), 
          x = "recruit_est", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Recruitment Estimate", ylab = "Mean SST frontal probability") +
  facet_wrap(~substrat) +
  theme_facet()

Updated plots 12/19 - Fprob no lag

fprob.rec %>% group_by(year, season) %>% 
  summarize(fprob = weighted_mean_fprob,
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=fprob, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


fprob.rec %>% group_by(year, season) %>% 
  summarize(fprob = weighted_mean_fprob,
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=fprob, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Recruit year (no lag)') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

Updated plots 12/19 - Fprob 1 year lag

fprob.rec %>% group_by(year, season) %>% 
  summarize(fprob = mean(fprob_lag1),
            r = mean(recruit_est)) %>% 
ggplot2::ggplot(aes(x=r, y=fprob, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw() #+ 

  # facet_wrap(~season)


fprob.rec %>% group_by(year, season) %>% 
  summarize(fprob = mean(fprob_lag1),
            r = mean(recruit_est)) %>% 
  ggplot2::ggplot(aes(x=fprob, y=r, color = factor(season))) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = factor(season)))  +
  labs(color = 'Season', title = 'Lag 1 year') +
  stat_cor(aes(label=..rr.label..)) +
  stat_cor(aes(label=..p.label..)) +
  theme_bw() + 
facet_wrap(~season, scales="free")

GAMs
## habitat (sst/bt/sal?)
habitat1 <- dplyr::full_join(
  sst.rec, bt.rec, by = join_by(year, month, recruit_est)) %>%
  dplyr::select(year, month, recruit_est, weighted_mean_sst, weighted_mean_bt)

habitat <- dplyr::full_join(
  habitat1, sal.rec, by = join_by(year, month, recruit_est)) %>%
  dplyr::select(year, month, recruit_est, weighted_mean_sst, weighted_mean_bt, weighted_mean_sal_78m)

habitat.gam <- gam(recruit_est ~ s(weighted_mean_sst, k=10) + s(weighted_mean_bt, k=10) + s(weighted_mean_sal_78m, k=10), data=habitat, method="REML")
summary(habitat.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## recruit_est ~ s(weighted_mean_sst, k = 10) + s(weighted_mean_bt, 
##     k = 10) + s(weighted_mean_sal_78m, k = 10)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1336701      45245   29.54   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf Ref.df     F  p-value    
## s(weighted_mean_sst)     1.476  1.806 1.572  0.30718    
## s(weighted_mean_bt)      1.532  1.915 6.101  0.00213 ** 
## s(weighted_mean_sal_78m) 4.205  5.211 5.825 3.71e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.106   Deviance explained = 13.3%
## -REML = 3510.6  Scale est. = 4.8925e+11  n = 239
plot(habitat.gam, seWithMean=TRUE)

##currents (shelfvol/gsi)
currents <- dplyr::full_join(
  df.join.shlf, df.gsi, by = join_by(year, month, recruit_est)) %>%
  dplyr::select(year,month, recruit_est, mean_v, GSI)
#no gsi <1998, no shlfvol 2020 >

currents.gam <- gam(recruit_est ~ s(mean_v, k=10) + s(GSI, k=10), data=currents, method="REML")
summary(currents.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## recruit_est ~ s(mean_v, k = 10) + s(GSI, k = 10)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1425673      72833   19.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df     F p-value
## s(mean_v) 2.234  2.812 1.680   0.255
## s(GSI)    1.002  1.004 0.417   0.520
## 
## R-sq.(adj) =  0.0305   Deviance explained =  5.9%
## -REML = 1624.6  Scale est. = 5.8882e+11  n = 111
plot(currents.gam, seWithMean=TRUE)

## food availability (micro/chl-a/sstfronts)
food1<- dplyr::full_join(
  micro.rec, chl.rec, by = join_by(year, month, recruit_est)) %>%
  dplyr::select(year,month, recruit_est, weighted_mean_micro, weighted_mean_chl)

food<-read.csv(here::here('data/food_rec.csv')) #join with fprob and grouped by season

food.gam <- gam(recruit_est ~ s(weighted_mean_micro, k=10) + s(weighted_mean_chl, k=10) + s(weighted_mean_fprob, k=10), data=food, method="REML")
summary(food.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## recruit_est ~ s(weighted_mean_micro, k = 10) + s(weighted_mean_chl, 
##     k = 10) + s(weighted_mean_fprob, k = 10)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1338230      44632   29.98   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                          edf Ref.df     F p-value  
## s(weighted_mean_micro) 1.001  1.002 0.281  0.5969  
## s(weighted_mean_chl)   1.003  1.006 0.057  0.8159  
## s(weighted_mean_fprob) 5.188  6.311 2.527  0.0332 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0512   Deviance explained = 7.84%
## -REML = 3707.8  Scale est. = 5.0199e+11  n = 252
plot(food.gam, seWithMean=TRUE)

References:

Joyce, Terrence M, Young-Oh Kwon, Hyodae Seo, and Caroline C Ummenhofer. 2019. “Meridional Gulf Stream Shifts Can Influence Wintertime Variability in the North Atlantic Storm Track and Greenland Blocking.” Geophysical Research Letters 46 (3): 1702–8. https://doi.org/10.1029/2018GL081087.

Hobbs, L., Banas, N. S., Cottier, F. R., Berge, J., & Daase, M. (2020). Eat or sleep: availability of winter prey explains mid-winter and spring activity in an Arctic Calanus population. Frontiers in Marine Science, 7, 541564.

Pérez-Hernández, M. Dolores, and Terrence M. Joyce. 2014. “Two Modes of Gulf Stream Variability Revealed in the Last Two Decades of Satellite Altimeter Data.” Journal of Physical Oceanography 44 (1): 149–63. https://doi.org/10.1175/JPO-D-13-0136.1.